Analysis date: 2023-08-11
CRC_Xenografts_Batch2_DataProcessing Script
load("../Data/Cache/Xenografts_Batch2_DataProcessing.RData")
set.seed(2023)
source("../../../General/Code/Analysis_Functions.R")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select() masks AnnotationDbi::select()
## ✖ dplyr::slice() masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
source("CRC_Xenografts_Batch2_Functions.R")
mat_kmean_pY <-
pY_Set4_form %>%
mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) ) %>%
column_to_rownames("peptide") %>%
select(all_of(contains("log2FC"))) %>%
as.matrix()
kmeans_tb_pY <-
pY_Set4_form %>%
select(HGNC_Symbol, Annotated_Sequence, all_of(contains("log2FC"))) %>%
pivot_longer(contains("log2FC"), names_to = "sample", values_to = "log2FC") %>%
separate(col = sample, sep = "_",
into = c("remove", "xenograft", "treatment",
"timepoint", "replicate", "set" ), remove = F ) %>%
mutate(peptide = paste0(HGNC_Symbol, "_", Annotated_Sequence) )
KMeans_Find_Nr_Clusters_elbow(mat_kmean_pY, c_max = 30)
pY_kmeans = kmeans(mat_kmean_pY, centers = 6, nstart = 200, iter.max = 10)
cluster_df_pY <- tibble("cluster" = pY_kmeans$cluster)
cluster_df_pY$peptide <- names(pY_kmeans$cluster)
kmeans_tb_pY <- left_join(kmeans_tb_pY, cluster_df_pY)
## Joining with `by = join_by(peptide)`
kmeans_tb_pY %>%
ggplot(aes(sample, log2FC, group = Annotated_Sequence)) +
geom_line(alpha = 0.2) +
geom_point(aes(color = treatment), size = 0.2) +
facet_wrap(~cluster) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90)) +
scale_color_manual(values = PGPalette[c(1,2,4,5)])
DT::datatable( filter="top", kmeans_tb_pY %>%
select(HGNC_Symbol, Annotated_Sequence, cluster) %>%
unique)
message("Cluster 1")
## Cluster 1
Plot_StringDB(kmeans_tb_pY %>%
select(HGNC_Symbol, Annotated_Sequence, cluster) %>%
filter(cluster == 1) %>%
select(HGNC_Symbol) %>% unique())
message("Cluster 2")
## Cluster 2
Plot_StringDB(kmeans_tb_pY %>%
select(HGNC_Symbol, Annotated_Sequence, cluster) %>%
filter(cluster == 2) %>%
select(HGNC_Symbol) %>% unique())
message("Cluster 3")
## Cluster 3
Plot_StringDB(kmeans_tb_pY %>%
select(HGNC_Symbol, Annotated_Sequence, cluster) %>%
filter(cluster == 3) %>%
select(HGNC_Symbol) %>% unique())
message("Cluster 6")
## Cluster 6
Plot_StringDB(kmeans_tb_pY %>%
select(HGNC_Symbol, Annotated_Sequence, cluster) %>%
filter(cluster == 6) %>%
select(HGNC_Symbol) %>% unique())